#!/bin/bash

# Main entry point to be called from training / processing jobs or inference endpoint by SageMaker.
# It will be either called from setup.py automatically when installing the 'sagemaker-ssh-helper' package
# from 'requirements.txt' and 'bootstrap_on_start' parameter was passed to the wrapper, or manually
# from training / processing / inference script, e. g. with subprocess.check_call()

set -e
set -o pipefail

# Uncomment below option for debugging
#set -x

dir=$(dirname "$0")
source "$dir"/sm-helper-functions

terminate() {
    echo "sm-setup-ssh: Terminated."
    exit 2
}

trap terminate SIGTERM
trap terminate SIGINT

# This script can be called simultaneously multiple times in a distributed training or inference job
# To avoid race conditions, we install helper scripts under an exclusive lock
if [[ "$1" == "install-helper-scripts" ]]; then
  _install_helper_scripts
  exit 0
fi

if [[ "$1" == "configure" ]]; then
  # Everything that requires root and Internet goes to 'configure'
  if [[ -f /opt/sagemaker-ssh-helper/.ssh-configured ]]; then
      echo "Already configured. Remove /opt/sagemaker-ssh-helper/.ssh-configured to force reconfiguration."
      exit 0
  fi

  chmod 1777 /tmp 2>/dev/null || echo "Cannot fix permissions of /tmp. Probably, already fixed and all is OK"
  mkdir -p ~/.ssh
  [ -d /usr/share/man/man1 ] || mkdir /usr/share/man/man1

  if _is_centos; then
    echo "Installing SSH server and auxiliary tools on CentOS"
    yum install -y openssh-server net-tools procps less jq vim rsync perl rsyslog
  else
    export DEBIAN_FRONTEND=noninteractive
    echo 'Retrieving the new lists of Debian packages'
    apt-get -qq update || echo 'sm-ssh-ide: WARNING - issues with retrieving new lists of packages'
    echo 'Installing Debian package manager utilities'
    apt-get -qq -y install apt-utils
    echo "Installing SSH server and auxiliary tools"
    apt-mark hold python3-distro ssh-import-id  # workaround to make installation work on Trn1
    apt-get -y install ssh net-tools procps less jq vim rsync locales rsyslog

    echo 'Installing locales to work with UTF-8 charsets'
    echo "C.UTF-8 UTF-8" >> /etc/locale.gen
    echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen
  fi

  # Sending all system logs to CloudWatch (helps to troubleshoot sshd issues)
  if _is_ssh_ide_inside_studio; then
    find /etc/rsyslog.conf /etc/rsyslog.d/ -type f \
      -exec sed -i -e 's~/var/log/~/var/log/apps/app_container.log__~' {} \;
  else
    find /etc/rsyslog.conf /etc/rsyslog.d/ -type f \
      -exec sed -i -e 's~/var/log/.*~/proc/1/fd/1~' {} \;
    # shellcheck disable=SC2016
    sed -i -e 's~^$PrivDropToUser~#$PrivDropToUser~' /etc/rsyslog.conf
    # shellcheck disable=SC2016
    sed -i -e 's~^$PrivDropToGroup~#$PrivDropToGroup~' /etc/rsyslog.conf
  fi

  sed -i -e 's~^ClientAliveInterval~#ClientAliveInterval~' /etc/ssh/sshd_config
  echo "ClientAliveInterval 15" >> /etc/ssh/sshd_config

  sed -i -e 's~^PermitRootLogin~#PermitRootLogin~' /etc/ssh/sshd_config
  echo PermitRootLogin yes >> /etc/ssh/sshd_config

  sed -i -e 's~^AuthorizedKeysFile~#AuthorizedKeysFile~' /etc/ssh/sshd_config
  echo "AuthorizedKeysFile /etc/ssh/authorized_keys" >> /etc/ssh/sshd_config

  if _is_centos; then
    [[ -f /etc/ssh/ssh_host_rsa_key ]] || (echo "Generating new SSH keys" && ssh-keygen -A)
  fi

  _install_sudo
  _install_unzip
  _install_curl
  _install_aws_cli
  _install_ssm_agent
  _install_jq

  cat >/etc/amazon/ssm/amazon-ssm-agent.json <<EOF
{
    "Profile":{
        "ShareCreds" : true,
        "ShareProfile" : "ssm",
        "ForceUpdateCreds" : false,
        "KeyAutoRotateDays": 0
    }
}
EOF

  cp /etc/amazon/ssm/seelog.xml.template /etc/amazon/ssm/seelog.xml

  # See https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-getting-started-ssm-user-permissions.html
  echo "ssm-user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/ssm-agent-users

  mkdir -p /opt/sagemaker-ssh-helper/
  touch /opt/sagemaker-ssh-helper/.ssh-configured
  exit 0
fi

if [[ "$1" == "start-ssh" ]]; then
  dir=$(dirname "$0")
  source "$dir"/sm-helper-functions

  sm-setup-ssh configure

  # Log IP addresses of the container (useful only in training in combination with VPC + VPN)
  echo "SSH Helper Log IP: $(hostname -I)"

  # Log OS version
  head -2 /etc/os-release || :

  # Save and dump SageMaker environment for SSH sessions
  sm-save-env

  # Dump container bootstrap environment (PID 1) - can be different from above, useful for debugging
  ps wwwe -p 1 | tail -1

  _locale_gen
  _start_syslogd
  _start_sshd

  sm-init-ssm

  # Running forever as daemon
  amazon-ssm-agent

  echo "ERROR: agent died"
  exit 1  # should never reach this line
fi

# All subprocesses should have access to helper scripts, so each one needs to wait until they are fully installed
# But we don't know which subprocess will start and finish first
# flock prevents from race condition during installation
# Useful for distributed training on multiple GPUs
echo "[sagemaker-ssh-helper][sm-setup-ssh] Acquiring an exclusive blocking lock and installing scripts."
flock /tmp/sm-install-lock bash "$0" install-helper-scripts \
  | sed -u 's/^/[sagemaker-ssh-helper][sm-setup-ssh][install-helper-scripts] /'

# nohup will detach the child process from parent and run it in background
# flock prevents from starting more than 1 process
# redirection to /proc/1/fd/1 will write logs to CloudWatch
# sed will prepend log output with SSH Helper prefix
if [[ ! -f /tmp/sm-start-ssh-lock ]]; then
  echo "[sagemaker-ssh-helper][sm-setup-ssh] Acquiring a non-blocking lock and starting SSH setup process in background."
  if [[ "$SSH_LOG_TO_STDOUT" == "true" ]]; then
    flock -n /tmp/sm-start-ssh-lock bash "$0" start-ssh &
  else
    nohup flock -n /tmp/sm-start-ssh-lock \
      bash "$0" start-ssh 2>&1 \
        | sed -u 's/^/[sagemaker-ssh-helper][sm-setup-ssh][start-ssh] /' \
        >/proc/1/fd/1 2>&1 &
  fi
  echo "[sagemaker-ssh-helper][sm-setup-ssh] SSH setup process is running in background."
else
  echo "[sagemaker-ssh-helper][sm-setup-ssh] Lock already exists, SSH setup process should be already running in background."
fi

sm-wait "${SSH_WAIT_TIME_SECONDS:-60}" \
  | sed -u 's/^/[sagemaker-ssh-helper][sm-setup-ssh][sm-wait] /'